*------------------------------------------------------------------------------
* Dollars, Desks, and Development: The Impact of Local Sales Tax Revenue Redistribution on Recipient Programs in North Carolina

* Authors: Alex Combs and Whitney Afonso

* Imports and cleans Annual Financial Information Reports (AFIR) data
*------------------------------------------------------------------------------
clear

import excel "AFIR Counties Master.xlsx", clear ///
	sheet("2012-2019") cellrange(A3:ZS802)
	
keep A-K Q U V W EU WP-WT ZS

destring C-ZS, force replace

rename A year
rename B cty_name
rename C prop_tax
rename D other_tax
rename E sales_tax
rename F sales_services
rename G intergov
rename H debt_proceeds
rename I other_misc
rename J total

drop if year==2012

foreach var of varlist prop_tax-total {
	rename `var' rev_`var'
}

rename K exp_education
rename Q exp_total

rename U expobj_school_capital
rename V expobj_school_currentopt
rename W expobj_school_allother

rename EU capital524use
replace capital524use = 0 if year<=2017

* Other school exp not reported in W before 2015
* If county reports values in W and not ZS for 2015+, fill 2013 and 2014 with col ZS
sort cty_name year
gen if_other = (expobj_school_allother>0) if expobj_school_allother!=. & year>=2015
bysort cty_name: egen tot_if_other = total(if_other)
gen ever_if_other = (tot_if_other>0)
gen ifnot_ZS = (ZS==0 & year>=2015) if ZS!=.
bysort cty_name: egen tot_ifnot_ZS = total(ifnot_ZS)
gen observedZS = (ZS!=.) if year>=2015
bysort cty_name: egen tot_obsZS = total(observedZS)
gen ZStoW_switch = (ever_if_other==1 & tot_ifnot_ZS==tot_obsZS)

replace expobj_school_allother = ZS if expobj_school_allother==. & /// 
 expobj_school_currentopt!=. & ZS!=. & year<=2014 & ZStoW_switch==1

replace expobj_school_allother = 0 if expobj_school_allother==. & ///
 expobj_school_currentopt!=. & year<=2014
 
sum expobj_school_capital expobj_school_currentopt expobj_school_allother, d

* Corrections to K12 AFIR data
* 4 observations appear to be data errors based on current operating values
* 4 counties have a negative or zero value; all other years positive
// Clay 2019 financial audit (pg. 85)
replace expobj_school_currentopt = 1603053 if cty_name=="Clay" & year==2019
replace expobj_school_capital = 1193448 if cty_name=="Clay" & year==2019
// Duplin 2018 financial audit (pg. 67 and 69) 
replace expobj_school_capital = 13358154 if cty_name=="Duplin" & year==2018
replace expobj_school_currentopt = 8801000 if cty_name=="Duplin" & year==2018
// Jones AFIR subcategories and 2017 audit (pg. 76)
replace expobj_school_currentopt = 1812820 if cty_name=="Jones" & year==2017
replace expobj_school_allother = 0 if cty_name=="Jones" & year==2017
// Stokes reported current in other 
replace expobj_school_currentopt = expobj_school_allother if cty_name=="Stokes" & year==2016
replace expobj_school_allother = 0 if cty_name=="Stokes" & year==2016

* Community college expenditures
* Education total includes K12 and community college
* Take difference between education and K12
gen exp_commcoll = exp_education - expobj_school_capital - expobj_school_currentopt if year<=2014
replace exp_commcoll = exp_education - expobj_school_capital - expobj_school_currentopt - expobj_school_allother if year>=2015

sum exp_commcoll, d

* Econ development expenditures
gen exp_econdev_pymt = WP + WQ
drop WP WQ 
rename WR exp_econdev_opt
gen exp_econdev_capital = WS + WT
drop WS WT
gen exp_econdev = exp_econdev_pymt + exp_econdev_opt + exp_econdev_capital

sum exp_econdev, d

// missing more than other vars
// supplement with NC LINC data

save nc_afir, replace

* NC LINC data https://www.osbm.nc.gov/facts-figures/linc
import delimited "cty_spending.csv", clear

/*
- 8 years and 100 counties = 800 observations but data has 805 observations
- Why additional 5 obs?
- One observation for Gibsonville (drop)
- Four observations (2017-2020) where there are two observations for
	McDowell and Mcdowell. Values for some variables are split between these
	two variables where McDowell is NA and Mcdowell is not NA. (reconcile)
*/

drop if county == "Gibsonville"

forvalues y = 2017/2020 {
	foreach var of varlist ad_valorem_property_tax_revenues-county_government_october_payrol {
		replace `var' = `var'[_n+4] if `var'=="NA" & `var'[_n+4] != "NA" /// 
		& county== "McDowell County" & year == `y'
	}
} 

drop if county == "Mcdowell County"

// 800 observation

* Destring numberical variables

destring ad_valorem_property_tax_revenues-county_government_october_payrol, force replace

* Name variables that were too long on import

rename v18 exp_public_school_current_ops
rename v22 intergov_exp_comm_colleges
rename v26 intergov_exp_public_schools
rename v65 abc_mix_bev_tax_to_hum_res

* Edit county names for merge

split county
rename county1 cty_name
replace cty_name = "New Hanover" if cty_name == "New"
drop county2 county3

* Data error for econ dev in LINC data

replace economic_physical_development_ex = culture_and_recreation_expenditu ///
	if year>2014

save nc_linc, replace

* Merge LINC to AFIR data
use nc_afir, clear

merge 1:1 cty_name year using nc_linc, keepusing(economic_physical_development_ex)
drop if _merge==2
drop _merge

replace exp_econdev = economic_physical_development_ex if exp_econdev==. /// 
 & economic_physical_development_ex!=.
 
// LINC uses zeros in place of missing data; should have same missing numbers
// as AFIR

replace exp_econdev = . if rev_prop_tax==.

drop economic_physical_development_ex
drop exp_econdev_opt-ZStoW_switch
drop exp_econdev_pymt exp_econdev_capital

replace rev_total = . if rev_total==0
replace exp_total = . if exp_total==0

replace cty_name = strupper(cty_name)

save nc_afir, replace
